#####
# Replication for: "Can Political Speech Foster Tolerance of Immigrants?" by Schleiter, Tavits, and Ward.
# Table S.15
#####

library(here)
library(data.table)
library(xtable)

# source the custom functions
source("functions.R")

# load the pooled data if not already in workspace
if(!exists("pooled")){
  pooled <- fread(here("data", "pooled.csv"))
}

# Create the trichotomous news indicator
pooled[ , news3 := factor(
  fcase(
    news2 < 6, "Low",
    news2 == 6, "Medium",
    news2 == 7, "High"
  ), levels = c("Low", "Medium", "High")
)]


# Helper functions --------------------------------------------------------

# These three functions do the starch of the analysis. The first fits all 9 models, the second extracts the 9 marginal effects per model. The third does the battery of p-values
allCovNewsfits <- 
  function(y, samp, data = pooled, chatty = T){
    if(samp != "Replication"){
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*news3 + NM*news3 + CS*news3 + female + age + I(age^2) + race2 + pid2 + edu2 + region2 + factor(wave)"))
    } else {
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*news3 + NM*news3 + CS*news3 + female + age + I(age^2) + race2 + pid2 + edu2 + region2"))
    }
    
    sample_use <- c("Main" = "replication == 0", "Replication" = "replication == 1", "Pooled" = "")[samp]
    
    if(samp != "Pooled"){
      mod <- lm(formula = form, data = data[eval(parse(text = sample_use))])
    } else {
      mod <- lm(formula = form, data = data)
    }
    
    if(chatty) print(summary(mod))
    return(mod)
  }

news_ME_extract <- 
  function(mod, level = .95){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "news" = c("L", "M", "H"), stringsAsFactors = F); setDT(out)
    
    out[news == "L" , c("est", "lower", "upper") := .(
      coef(mod)[c("CH", "NM", "CS")],
      confint(mod, level = level)[c("CH", "NM", "CS"), 1],
      confint(mod, level = level)[c("CH", "NM", "CS"), 2]
    )]
    
    for(n in 4:9){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":news3", ifelse(out$news[n] == "M", "Medium", "High")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "est", value = est)
      set(out, i = n, j = "lower", value = est - 1.96*se)
      set(out, i = n, j = "upper", value = est + 1.96*se)
    }
    
    return(out)
  }

# p-values that I want:
# for each one, compare that to 0 (this is easy for I, hard for others)
# For each, compare I to D (just the signif. of the interaction)
# for each, compare I to R (just the signif. of the interaction)
# For each, compare D to R (test of significant differences between the interaction terms)

news_pval_extract <-
  function(mod){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "test" = c("L_v_0", "M_v_0", "H_v_0", "L_v_M", "L_v_H", "M_v_H"), stringsAsFactors = F); setDT(out)
    
    # knock out the ones that aren't marginal effects
    out[test == "L_v_0", p := summary(mod)$coefficients[treatment, 4]]
    out[test == "L_v_M", p := summary(mod)$coefficients[paste0(treatment, ":news3Medium"), 4]]
    out[test == "L_v_H", p := summary(mod)$coefficients[paste0(treatment, ":news3High"), 4]]
    
    # D_v_0 and R_v_0 are easy enough
    for(n in which(out$test %in% c("M_v_0", "H_v_0"))){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":news3", ifelse(out$test[n] == "M_v_0", "Medium", "High")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1-pnorm(abs(est/se))))
    }
    
    # D_v_R is test of significant difference between the interaction terms
    for(n in which(out$test %in% "M_v_H")){
      vars <- paste0(out$treatment[n], c(":news3Medium", ":news3High"))
      top <- coef(mod)[vars[1]] - coef(mod)[vars[2]]
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] - 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1 - pnorm(abs(top/se))))
    }
    
    return(out)
  }

# Fitting -----------------------------------------------------------------


news_fits <- expand.grid("y" = c("immPCA", "imm_neighbors2", "imm_increase2"), "samp" = c("Main", "Replication", "Pooled")); setDT(news_fits)

news_fits[ , fit := Map(allCovNewsfits, y, samp, chatty = F)]
news_me<- news_fits[ , news_ME_extract(mod = fit[[1]]), by = .(y, samp)]
news_p <- news_fits[ , news_pval_extract(fit[[1]]), by = .(y, samp)]

# Reporting ---------------------------------------------------------------

p_print <- dcast(news_p, samp + y + treatment ~ test, value.var = "p")

p_print[ , y := fcase(
  y == "immPCA", "Index",
  y == "imm_neighbors2", "Neighbors",
  y == "imm_increase2", "Increase"
)]

p_print[ , treatment := fcase(
  treatment == "NM", "Norms",
  treatment == "CH", "Common Humanity",
  treatment == "CS", "Countering Stereotypes"
)]

p_print <- p_print[ , lapply(.SD, function(col){
  if(is.numeric(col)){
    return(ifelse(col < 0.05, paste0("\\textbf{", round(col, 2), "}"), round(col, 2)))
  } else {
    return(col)
  }
})]

print(
  xtable(
    p_print[ , .(y, treatment, L_v_0, M_v_0, H_v_0, L_v_M, L_v_H, M_v_H)],
    caption = "P-values from News Consumption Heterogeneity Analysis",
    align = rep(c("l", "c"), times = c(3, 6))
  ), 
  include.rownames = FALSE,
  booktabs = T,
  sanitize.text.function = function(x){x}
)
